{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import pandas as pd\n",
    "import time\n",
    "import datetime\n",
    "import os\n",
    "import json\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), \"..\", \"..\"))\n",
    "OUTPUT_DIR = os.path.join(BASE_DIR, \"build\", \"output\")\n",
    "INPUT_DIR = os.path.join(BASE_DIR, \"build\", \"input\")\n",
    "WAYBACK_POOLS_DIR = os.path.join(BASE_DIR, \"build\", \"input\", \"defillama_pool_list_wayback_machine\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to get pool data from the API; note this only returns the current list of pools, so defunct pools are missing (which I address with wayback machine below)\n",
    "def get_pool_data():\n",
    "    url = \"https://yields.llama.fi/pools\"\n",
    "    headers = {'accept': '*/*'}\n",
    "\n",
    "    response = requests.get(url, headers=headers)\n",
    "    print(response)\n",
    "\n",
    "    pools = response.json().get('data', [])\n",
    "    df = pd.DataFrame(pools)\n",
    "\n",
    "    today_date = datetime.datetime.now().strftime(\"%Y-%m-%d\")\n",
    "    df['snapshot_date'] = today_date\n",
    "    \n",
    "    return df\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_wayback_pools(directory_path):\n",
    "    historical_data = []\n",
    "\n",
    "    # Iterate through all JSON files in the directory\n",
    "    for filename in os.listdir(directory_path):\n",
    "        if filename.endswith(\".json\"):\n",
    "            file_path = os.path.join(directory_path, filename)\n",
    "            \n",
    "            # Extract snapshot_date from the filename\n",
    "            try:\n",
    "                # Find the segment that contains the snapshot date\n",
    "                snapshot_date = datetime.datetime.strptime(filename.split('web:')[1][:8], \"%Y%m%d\").strftime(\"%Y-%m-%d\")\n",
    "\n",
    "            except IndexError:\n",
    "                print(f\"Could not extract snapshot_date from filename: {filename}\")\n",
    "                continue\n",
    "\n",
    "            with open(file_path, 'r') as file:\n",
    "                try:\n",
    "                    data = json.load(file)\n",
    "                    if 'data' in data:\n",
    "                        # Add snapshot_date to each entry\n",
    "                        for entry in data['data']:\n",
    "                            entry['snapshot_date'] = snapshot_date\n",
    "                        historical_data.extend(data['data'])  # Append data list from the JSON\n",
    "                except Exception as e:\n",
    "                    print(f\"Error reading {filename}: {e}\")\n",
    "    \n",
    "    # Convert the accumulated data to a DataFrame\n",
    "    df_historical = pd.DataFrame(historical_data)\n",
    "    return df_historical\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_historical_pool_data(pool_id):\n",
    "    url = f\"https://yields.llama.fi/chart/{pool_id}\"\n",
    "    headers = {'accept': '*/*'}\n",
    "    max_retries = 5\n",
    "    retry_delay = 2\n",
    "\n",
    "    for attempt in range(max_retries):\n",
    "        response = requests.get(url, headers=headers)\n",
    "        if response.status_code == 200:\n",
    "            print(f\"Fetched historical data for pool {pool_id}.\")\n",
    "            return response.json()\n",
    "        elif response.status_code == 429:  # Rate-limited\n",
    "            print(f\"Rate-limited. Retrying in {retry_delay} seconds...\")\n",
    "            time.sleep(retry_delay)\n",
    "            retry_delay *= 2  # Exponential backoff\n",
    "        else:\n",
    "            print(f\"Error fetching data for pool {pool_id}. Status code: {response.status_code}\")\n",
    "            return None\n",
    "\n",
    "    print(f\"Failed to fetch data for pool {pool_id} after {max_retries} attempts.\")\n",
    "    return None\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# this function gets the type of protocol (Lending, Dexes, etc.)\n",
    "def get_protocol_characteristics():\n",
    "    url = \"https://api.llama.fi/protocols\"\n",
    "    headers = {'accept': '*/*'}\n",
    "\n",
    "    response = requests.get(url, headers=headers)\n",
    "    if response.status_code == 200:\n",
    "        # Parse the response JSON and keep only relevant columns\n",
    "        protocols = response.json()\n",
    "        df = pd.DataFrame(protocols)\n",
    "        df = df[['slug', 'name', 'category']]  # Keep only these columns\n",
    "        df.rename(columns={'slug': 'project'}, inplace=True)  # Rename 'slug' to 'project'\n",
    "        return df\n",
    "    else:\n",
    "        # Handle errors\n",
    "        print(f\"Error: Unable to fetch data. Status code {response.status_code}\")\n",
    "        return None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_data_from_pool_list(pool_list, df_pools_for_merge):\n",
    "    df_list = []\n",
    "\n",
    "    # Loop through each pool ID in the stablecoin list and fetch the historical data\n",
    "    for pool_id in tqdm(pool_list, desc=\"Fetching pool data\", unit=\"pool\"):\n",
    "        \n",
    "        # Fetch the historical data for the pool\n",
    "        historical_data = get_historical_pool_data(pool_id)\n",
    "        \n",
    "        # If data was fetched, convert it to a DataFrame and add pool and symbol columns\n",
    "        if historical_data:\n",
    "            try: \n",
    "                pool_data = historical_data['data']\n",
    "                \n",
    "                pool_data_df = pd.DataFrame(historical_data)\n",
    "                pool_data_df['pool'] = pool_id\n",
    "                df_data = pd.json_normalize(pool_data_df['data'])\n",
    "\n",
    "                # Combine unnested data with the original DataFrame (without the 'data' column)\n",
    "                pool_data_df = pd.concat([pool_data_df.drop(columns=['data']), df_data], axis=1)\n",
    "\n",
    "                # merge with pool characteristics\n",
    "                pool_data_df = pd.merge(pool_data_df, df_pools_for_merge, on = 'pool', how='left')\n",
    "                \n",
    "                # Add the DataFrame to the list\n",
    "                df_list.append(pool_data_df)\n",
    "            except Exception as e:\n",
    "                    print(f\"Error processing pool {pool_id}: {e}\")\n",
    "        else:\n",
    "            print(f\"No data returned for pool {pool_id}.\")\n",
    "\n",
    "        # Sleep to avoid hitting rate limits\n",
    "        time.sleep(2)\n",
    "\n",
    "    df_panel = pd.concat(df_list, ignore_index=True)\n",
    "    return df_panel\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Execute the function to fetch and save the data\n",
    "df_protocol_characteristics = get_protocol_characteristics()\n",
    "df_pools = get_pool_data()\n",
    "df_pools_historical = get_wayback_pools(\"/path/to/build/input/defillama_pool_list_wayback_machine\")\n",
    "\n",
    "# Keep only relevant columns\n",
    "columns_of_interest = ['chain', 'project', 'symbol', 'pool', 'stablecoin', 'exposure', 'snapshot_date']\n",
    "df_pools = df_pools[columns_of_interest]\n",
    "df_pools_historical = df_pools_historical[columns_of_interest]\n",
    "\n",
    "# Combine current and historical data, dropping duplicates\n",
    "df_pools_combined = pd.concat([df_pools, df_pools_historical])\n",
    "\n",
    "# Ensure the most recent snapshot is kept for each pool\n",
    "df_pools_combined['snapshot_date'] = pd.to_datetime(df_pools_combined['snapshot_date'])\n",
    "df_pools_combined = df_pools_combined.sort_values(by=['pool', 'snapshot_date'], ascending=[True, False])\n",
    "df_pools_including_defunct = df_pools_combined.drop_duplicates(subset='pool')\n",
    "\n",
    "# Merge protocol data with pool characteristics\n",
    "df_pools_including_defunct = pd.merge(\n",
    "    df_pools_including_defunct,\n",
    "    df_protocol_characteristics,\n",
    "    on='project',\n",
    "    how='left'\n",
    ")\n",
    "\n",
    "df_pools_of_interest = df_pools_including_defunct.query(\n",
    "    'category == \"Lending\" or category == \"CDP\"'\n",
    ")\n",
    "\n",
    "pools_of_interest_list = df_pools_of_interest['pool'].tolist()\n",
    "\n",
    "df_pools_for_merge = df_pools_including_defunct[['chain', 'project', 'symbol', 'pool', 'stablecoin', 'exposure', 'category', 'name', 'snapshot_date']]\n",
    "\n",
    "print(len(pools_of_interest_list))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_panel = get_data_from_pool_list(pools_of_interest_list, df_pools_for_merge)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "timestamp = datetime.datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
    "df_panel_name = os.path.join(OUTPUT_DIR, f\"defillama_lending_rates_{timestamp}.csv\")\n",
    "df_panel.to_csv(df_panel_name, index=False)\n",
    "\n",
    "df_pools_name = os.path.join(OUTPUT_DIR, f\"defillama_lending_pools_{timestamp}.csv\")\n",
    "df_pools.to_csv(df_pools_name, index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "leverage_and_stablecoin_pegs",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
